In [1]:
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
from matplotlib.pyplot import show

Reading Training Dataset¶

In [2]:
df1=pd.read_csv('training_set.csv')
In [3]:
df1
Out[3]:
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition SalePrice
0 1 60 RL 65.0 8450 Pave NaN Reg Lvl AllPub ... 0 NaN NaN NaN 0 2 2008 WD Normal 208500
1 2 20 RL 80.0 9600 Pave NaN Reg Lvl AllPub ... 0 NaN NaN NaN 0 5 2007 WD Normal 181500
2 3 60 RL 68.0 11250 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN NaN 0 9 2008 WD Normal 223500
3 4 70 RL 60.0 9550 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN NaN 0 2 2006 WD Abnorml 140000
4 5 60 RL 84.0 14260 Pave NaN IR1 Lvl AllPub ... 0 NaN NaN NaN 0 12 2008 WD Normal 250000
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1455 1456 60 RL 62.0 7917 Pave NaN Reg Lvl AllPub ... 0 NaN NaN NaN 0 8 2007 WD Normal 175000
1456 1457 20 RL 85.0 13175 Pave NaN Reg Lvl AllPub ... 0 NaN MnPrv NaN 0 2 2010 WD Normal 210000
1457 1458 70 RL 66.0 9042 Pave NaN Reg Lvl AllPub ... 0 NaN GdPrv Shed 2500 5 2010 WD Normal 266500
1458 1459 20 RL 68.0 9717 Pave NaN Reg Lvl AllPub ... 0 NaN NaN NaN 0 4 2010 WD Normal 142125
1459 1460 20 RL 75.0 9937 Pave NaN Reg Lvl AllPub ... 0 NaN NaN NaN 0 6 2008 WD Normal 147500

1460 rows × 81 columns

In [4]:
df1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1460 non-null   int64  
 1   MSSubClass     1460 non-null   int64  
 2   MSZoning       1460 non-null   object 
 3   LotFrontage    1201 non-null   float64
 4   LotArea        1460 non-null   int64  
 5   Street         1460 non-null   object 
 6   Alley          91 non-null     object 
 7   LotShape       1460 non-null   object 
 8   LandContour    1460 non-null   object 
 9   Utilities      1460 non-null   object 
 10  LotConfig      1460 non-null   object 
 11  LandSlope      1460 non-null   object 
 12  Neighborhood   1460 non-null   object 
 13  Condition1     1460 non-null   object 
 14  Condition2     1460 non-null   object 
 15  BldgType       1460 non-null   object 
 16  HouseStyle     1460 non-null   object 
 17  OverallQual    1460 non-null   int64  
 18  OverallCond    1460 non-null   int64  
 19  YearBuilt      1460 non-null   int64  
 20  YearRemodAdd   1460 non-null   int64  
 21  RoofStyle      1460 non-null   object 
 22  RoofMatl       1460 non-null   object 
 23  Exterior1st    1460 non-null   object 
 24  Exterior2nd    1460 non-null   object 
 25  MasVnrType     1452 non-null   object 
 26  MasVnrArea     1452 non-null   float64
 27  ExterQual      1460 non-null   object 
 28  ExterCond      1460 non-null   object 
 29  Foundation     1460 non-null   object 
 30  BsmtQual       1423 non-null   object 
 31  BsmtCond       1423 non-null   object 
 32  BsmtExposure   1422 non-null   object 
 33  BsmtFinType1   1423 non-null   object 
 34  BsmtFinSF1     1460 non-null   int64  
 35  BsmtFinType2   1422 non-null   object 
 36  BsmtFinSF2     1460 non-null   int64  
 37  BsmtUnfSF      1460 non-null   int64  
 38  TotalBsmtSF    1460 non-null   int64  
 39  Heating        1460 non-null   object 
 40  HeatingQC      1460 non-null   object 
 41  CentralAir     1460 non-null   object 
 42  Electrical     1459 non-null   object 
 43  1stFlrSF       1460 non-null   int64  
 44  2ndFlrSF       1460 non-null   int64  
 45  LowQualFinSF   1460 non-null   int64  
 46  GrLivArea      1460 non-null   int64  
 47  BsmtFullBath   1460 non-null   int64  
 48  BsmtHalfBath   1460 non-null   int64  
 49  FullBath       1460 non-null   int64  
 50  HalfBath       1460 non-null   int64  
 51  BedroomAbvGr   1460 non-null   int64  
 52  KitchenAbvGr   1460 non-null   int64  
 53  KitchenQual    1460 non-null   object 
 54  TotRmsAbvGrd   1460 non-null   int64  
 55  Functional     1460 non-null   object 
 56  Fireplaces     1460 non-null   int64  
 57  FireplaceQu    770 non-null    object 
 58  GarageType     1379 non-null   object 
 59  GarageYrBlt    1379 non-null   float64
 60  GarageFinish   1379 non-null   object 
 61  GarageCars     1460 non-null   int64  
 62  GarageArea     1460 non-null   int64  
 63  GarageQual     1379 non-null   object 
 64  GarageCond     1379 non-null   object 
 65  PavedDrive     1460 non-null   object 
 66  WoodDeckSF     1460 non-null   int64  
 67  OpenPorchSF    1460 non-null   int64  
 68  EnclosedPorch  1460 non-null   int64  
 69  3SsnPorch      1460 non-null   int64  
 70  ScreenPorch    1460 non-null   int64  
 71  PoolArea       1460 non-null   int64  
 72  PoolQC         7 non-null      object 
 73  Fence          281 non-null    object 
 74  MiscFeature    54 non-null     object 
 75  MiscVal        1460 non-null   int64  
 76  MoSold         1460 non-null   int64  
 77  YrSold         1460 non-null   int64  
 78  SaleType       1460 non-null   object 
 79  SaleCondition  1460 non-null   object 
 80  SalePrice      1460 non-null   int64  
dtypes: float64(3), int64(35), object(43)
memory usage: 924.0+ KB
In [5]:
df1['SaleCondition'].value_counts()
Out[5]:
Normal     1198
Partial     125
Abnorml     101
Family       20
Alloca       12
AdjLand       4
Name: SaleCondition, dtype: int64
In [6]:
df1.shape
Out[6]:
(1460, 81)

Missing Data Treatment (Filling Null values)¶

In [7]:
df1.isna().sum()
Out[7]:
Id                 0
MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 81, dtype: int64
In [8]:
for i in df1.columns:
        if (df1[i].isna().sum())>0:
            if df1[i].dtypes=='object':
                x=df1[i].mode()[0]
                df1[i]=df1[i].fillna(x)
            else:
                x=df1[i].mean()
                df1[i]=df1[i].fillna(x)
In [9]:
df1.isna().sum()
Out[9]:
Id               0
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
                ..
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
SalePrice        0
Length: 81, dtype: int64

Droping irrelevant columns¶

Seperating Input & Output features (X & Y)¶

In [10]:
X=df1.drop(labels=['SalePrice','Id','LowQualFinSF','MiscVal'],axis=1)
Y=df1['SalePrice']
In [11]:
X.shape
Out[11]:
(1460, 77)
In [12]:
Y.shape
Out[12]:
(1460,)

Seperating Categorical & Continuous columns¶

In [13]:
cat=[]
con=[]
for i in X.columns:
    if X[i].dtypes=='object':
        cat.append(i)
    else:
        con.append(i)
print(cat)
print(con)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
In [14]:
cat=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 
     'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 
     'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 
     'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 
     'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
con=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 
     'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 
     'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 
     'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 
     'MoSold', 'YrSold']

Analysis¶

Univariant Analysis¶

In [15]:
for i in df1.columns:
    if df1[i].dtypes=='object':
        df1[i].value_counts().plot(kind='bar')
        plt.show()
    else:
        sb.histplot(data=df1,x=df1[i],kde=True)
        plt.show()

Bivariant Analysis¶

In [16]:
for i in df1.columns:
    if df1[i].dtypes=='object':
        sb.boxplot(data=df1,x=df1[i],y='SalePrice')
        plt.show()
    else:
        plt.scatter(data=df1,x=df1[i],y='SalePrice')
        plt.xlabel(i)
        plt.ylabel('SalePrice')
        plt.title(f'{i} vs SalePrice')
        plt.show()

Multivarient Analysis¶

In [17]:
a=df1.corr()
C:\Users\Krish\AppData\Local\Temp\ipykernel_19172\1171384326.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  a=df1.corr()
In [18]:
a
Out[18]:
Id MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 ... WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MiscVal MoSold YrSold SalePrice
Id 1.000000 0.011156 -0.009601 -0.033226 -0.028365 0.012609 -0.012713 -0.021998 -0.050199 -0.005024 ... -0.029643 -0.000477 0.002889 -0.046635 0.001330 0.057044 -0.006242 0.021172 0.000712 -0.021917
MSSubClass 0.011156 1.000000 -0.357056 -0.139781 0.032628 -0.059316 0.027850 0.040581 0.022895 -0.069836 ... -0.012579 -0.006100 -0.012037 -0.043825 -0.026030 0.008283 -0.007683 -0.013585 -0.021407 -0.084284
LotFrontage -0.009601 -0.357056 1.000000 0.306795 0.234196 -0.052820 0.117598 0.082746 0.179283 0.215828 ... 0.077106 0.137454 0.009790 0.062335 0.037684 0.180868 0.001168 0.010158 0.006768 0.334901
LotArea -0.033226 -0.139781 0.306795 1.000000 0.105806 -0.005636 0.014228 0.013788 0.103960 0.214103 ... 0.171698 0.084774 -0.018340 0.020423 0.043160 0.077672 0.038068 0.001205 -0.014261 0.263843
OverallQual -0.028365 0.032628 0.234196 0.105806 1.000000 -0.091932 0.572323 0.550684 0.410238 0.239666 ... 0.238923 0.308819 -0.113937 0.030371 0.064886 0.065166 -0.031406 0.070815 -0.027347 0.790982
OverallCond 0.012609 -0.059316 -0.052820 -0.005636 -0.091932 1.000000 -0.375983 0.073741 -0.127788 -0.046231 ... -0.003334 -0.032589 0.070356 0.025504 0.054811 -0.001985 0.068777 -0.003511 0.043950 -0.077856
YearBuilt -0.012713 0.027850 0.117598 0.014228 0.572323 -0.375983 1.000000 0.592855 0.314745 0.249503 ... 0.224880 0.188686 -0.387268 0.031355 -0.050364 0.004950 -0.034383 0.012398 -0.013618 0.522897
YearRemodAdd -0.021998 0.040581 0.082746 0.013788 0.550684 0.073741 0.592855 1.000000 0.179186 0.128451 ... 0.205726 0.226298 -0.193919 0.045286 -0.038740 0.005829 -0.010286 0.021490 0.035743 0.507101
MasVnrArea -0.050199 0.022895 0.179283 0.103960 0.410238 -0.127788 0.314745 0.179186 1.000000 0.263582 ... 0.159349 0.124965 -0.109849 0.018795 0.061453 0.011723 -0.029815 -0.005940 -0.008184 0.475241
BsmtFinSF1 -0.005024 -0.069836 0.215828 0.214103 0.239666 -0.046231 0.249503 0.128451 0.263582 1.000000 ... 0.204306 0.111761 -0.102303 0.026451 0.062021 0.140491 0.003571 -0.015727 0.014359 0.386420
BsmtFinSF2 -0.005968 -0.065649 0.043340 0.111170 -0.059119 0.040229 -0.049107 -0.067759 -0.072302 -0.050117 ... 0.067898 0.003093 0.036543 -0.029993 0.088871 0.041709 0.004940 -0.015211 0.031706 -0.011378
BsmtUnfSF -0.007940 -0.140759 0.122156 -0.002618 0.308159 -0.136841 0.149040 0.181133 0.114184 -0.495251 ... -0.005316 0.129005 -0.002538 0.020764 -0.012579 -0.035092 -0.023837 0.034888 -0.041258 0.214479
TotalBsmtSF -0.015415 -0.238518 0.363358 0.260833 0.537808 -0.171098 0.391452 0.291066 0.362452 0.522396 ... 0.232019 0.247264 -0.095478 0.037384 0.084489 0.126053 -0.018479 0.013196 -0.014969 0.613581
1stFlrSF 0.010496 -0.251758 0.414266 0.299475 0.476224 -0.144203 0.281986 0.240379 0.342160 0.445863 ... 0.235459 0.211671 -0.065292 0.056104 0.088758 0.131525 -0.021096 0.031372 -0.013604 0.605852
2ndFlrSF 0.005590 0.307886 0.072483 0.050986 0.295493 0.028942 0.010308 0.140024 0.174019 -0.137079 ... 0.092165 0.208026 0.061989 -0.024358 0.040606 0.081487 0.016197 0.035164 -0.028700 0.319334
LowQualFinSF -0.044230 0.046474 0.036849 0.004779 -0.030429 0.025494 -0.183784 -0.062419 -0.069068 -0.064503 ... -0.025444 0.018251 0.061081 -0.004296 0.026799 0.062157 -0.003793 -0.022174 -0.028921 -0.025606
GrLivArea 0.008273 0.074853 0.368392 0.263116 0.593007 -0.079686 0.199010 0.287389 0.389893 0.208171 ... 0.247433 0.330224 0.009113 0.020643 0.101510 0.170205 -0.002416 0.050240 -0.036526 0.708624
BsmtFullBath 0.002289 0.003491 0.091481 0.158155 0.111098 -0.054942 0.187599 0.119470 0.085055 0.649212 ... 0.175315 0.067341 -0.049911 -0.000106 0.023148 0.067616 -0.023047 -0.025361 0.067049 0.227122
BsmtHalfBath -0.020155 -0.002333 -0.006419 0.048046 -0.040150 0.117821 -0.038162 -0.012337 0.026669 0.067418 ... 0.040161 -0.025324 -0.008555 0.035114 0.032121 0.020025 -0.007367 0.032873 -0.046524 -0.016844
FullBath 0.005587 0.131608 0.180424 0.126031 0.550600 -0.194149 0.468271 0.439046 0.275730 0.058543 ... 0.187703 0.259977 -0.115093 0.035353 -0.008106 0.049604 -0.014290 0.055872 -0.019669 0.560664
HalfBath 0.006784 0.177354 0.048258 0.014259 0.273458 -0.060769 0.242656 0.183331 0.200802 0.004262 ... 0.108080 0.199740 -0.095317 -0.004972 0.072426 0.022381 0.001290 -0.009050 -0.010269 0.284108
BedroomAbvGr 0.037719 -0.023438 0.237023 0.119690 0.101676 0.012980 -0.070651 -0.040581 0.102417 -0.107355 ... 0.046854 0.093810 0.041570 -0.024478 0.044300 0.070703 0.007767 0.046544 -0.036014 0.168213
KitchenAbvGr 0.002951 0.281721 -0.005805 -0.017784 -0.183882 -0.087001 -0.174800 -0.149598 -0.037364 -0.081007 ... -0.090130 -0.070091 0.037312 -0.024600 -0.051613 -0.014525 0.062341 0.026589 0.031687 -0.135907
TotRmsAbvGrd 0.027239 0.040380 0.320146 0.190015 0.427452 -0.057583 0.095589 0.191740 0.280027 0.044316 ... 0.165984 0.234192 0.004151 -0.006683 0.059383 0.083757 0.024763 0.036907 -0.034516 0.533723
Fireplaces -0.019772 -0.045569 0.235755 0.271364 0.396765 -0.023820 0.147716 0.112581 0.247906 0.260011 ... 0.200019 0.169405 -0.024822 0.011257 0.184530 0.095074 0.001409 0.046357 -0.024096 0.466929
GarageYrBlt 0.000070 0.080187 0.064324 -0.024812 0.518018 -0.306169 0.780555 0.618130 0.249367 0.150338 ... 0.220623 0.218490 -0.285882 0.023534 -0.075256 -0.014499 -0.031853 0.005173 -0.000987 0.470177
GarageCars 0.016570 -0.040110 0.269729 0.154871 0.600671 -0.185758 0.537850 0.420622 0.363778 0.224054 ... 0.226342 0.213569 -0.151434 0.035765 0.050494 0.020934 -0.043080 0.040522 -0.039117 0.640409
GarageArea 0.017634 -0.098672 0.323663 0.180403 0.562022 -0.151521 0.478954 0.371600 0.372567 0.296970 ... 0.224666 0.241435 -0.121777 0.035087 0.051412 0.061047 -0.027400 0.027974 -0.027378 0.623431
WoodDeckSF -0.029643 -0.012579 0.077106 0.171698 0.238923 -0.003334 0.224880 0.205726 0.159349 0.204306 ... 1.000000 0.058661 -0.125989 -0.032771 -0.074181 0.073378 -0.009551 0.021011 0.022270 0.324413
OpenPorchSF -0.000477 -0.006100 0.137454 0.084774 0.308819 -0.032589 0.188686 0.226298 0.124965 0.111761 ... 0.058661 1.000000 -0.093079 -0.005842 0.074304 0.060762 -0.018584 0.071255 -0.057619 0.315856
EnclosedPorch 0.002889 -0.012037 0.009790 -0.018340 -0.113937 0.070356 -0.387268 -0.193919 -0.109849 -0.102303 ... -0.125989 -0.093079 1.000000 -0.037305 -0.082864 0.054203 0.018361 -0.028887 -0.009916 -0.128578
3SsnPorch -0.046635 -0.043825 0.062335 0.020423 0.030371 0.025504 0.031355 0.045286 0.018795 0.026451 ... -0.032771 -0.005842 -0.037305 1.000000 -0.031436 -0.007992 0.000354 0.029474 0.018645 0.044584
ScreenPorch 0.001330 -0.026030 0.037684 0.043160 0.064886 0.054811 -0.050364 -0.038740 0.061453 0.062021 ... -0.074181 0.074304 -0.082864 -0.031436 1.000000 0.051307 0.031946 0.023217 0.010694 0.111447
PoolArea 0.057044 0.008283 0.180868 0.077672 0.065166 -0.001985 0.004950 0.005829 0.011723 0.140491 ... 0.073378 0.060762 0.054203 -0.007992 0.051307 1.000000 0.029669 -0.033737 -0.059689 0.092404
MiscVal -0.006242 -0.007683 0.001168 0.038068 -0.031406 0.068777 -0.034383 -0.010286 -0.029815 0.003571 ... -0.009551 -0.018584 0.018361 0.000354 0.031946 0.029669 1.000000 -0.006495 0.004906 -0.021190
MoSold 0.021172 -0.013585 0.010158 0.001205 0.070815 -0.003511 0.012398 0.021490 -0.005940 -0.015727 ... 0.021011 0.071255 -0.028887 0.029474 0.023217 -0.033737 -0.006495 1.000000 -0.145721 0.046432
YrSold 0.000712 -0.021407 0.006768 -0.014261 -0.027347 0.043950 -0.013618 0.035743 -0.008184 0.014359 ... 0.022270 -0.057619 -0.009916 0.018645 0.010694 -0.059689 0.004906 -0.145721 1.000000 -0.028923
SalePrice -0.021917 -0.084284 0.334901 0.263843 0.790982 -0.077856 0.522897 0.507101 0.475241 0.386420 ... 0.324413 0.315856 -0.128578 0.044584 0.111447 0.092404 -0.021190 0.046432 -0.028923 1.000000

38 rows × 38 columns

In [19]:
sb.heatmap(a)
Out[19]:
<Axes: >

Standardisation of Continuous columns (Normalisation)¶

Standardisation¶

In [20]:
from sklearn.preprocessing import StandardScaler
In [21]:
ss=StandardScaler()
In [22]:
X1=pd.DataFrame(ss.fit_transform(X[con]),columns=con)
In [23]:
X1
Out[23]:
MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 ... GarageCars GarageArea WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MoSold YrSold
0 0.073375 -0.229372 -0.207142 0.651479 -0.517200 1.050994 0.878668 0.511418 0.575425 -0.288653 ... 0.311725 0.351000 -0.752176 0.216503 -0.359325 -0.116339 -0.270208 -0.068692 -1.599111 0.138777
1 -0.872563 0.451936 -0.091886 -0.071836 2.179628 0.156734 -0.429577 -0.574410 1.171992 -0.288653 ... 0.311725 -0.060731 1.626195 -0.704483 -0.359325 -0.116339 -0.270208 -0.068692 -0.489110 -0.614439
2 0.073375 -0.093110 0.073480 0.651479 -0.517200 0.984752 0.830215 0.323060 0.092907 -0.288653 ... 0.311725 0.631726 -0.752176 -0.070361 -0.359325 -0.116339 -0.270208 -0.068692 0.990891 0.138777
3 0.309859 -0.456474 -0.096897 0.651479 -0.517200 -1.863632 -0.720298 -0.574410 -0.499274 -0.288653 ... 1.650307 0.790804 -0.752176 -0.176048 4.092524 -0.116339 -0.270208 -0.068692 -1.599111 -1.367655
4 0.073375 0.633618 0.375148 1.374795 -0.517200 0.951632 0.733308 1.364570 0.463568 -0.288653 ... 1.650307 1.698485 0.780197 0.563760 -0.359325 -0.116339 -0.270208 -0.068692 2.100892 0.138777
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1455 0.073375 -0.365633 -0.260560 -0.071836 -0.517200 0.918511 0.733308 -0.574410 -0.973018 -0.288653 ... 0.311725 -0.060731 -0.752176 -0.100558 -0.359325 -0.116339 -0.270208 -0.068692 0.620891 -0.614439
1456 -0.872563 0.679039 0.266407 -0.071836 0.381743 0.222975 0.151865 0.084843 0.759659 0.722112 ... 0.311725 0.126420 2.033231 -0.704483 -0.359325 -0.116339 -0.270208 -0.068692 -1.599111 1.645210
1457 0.309859 -0.183951 -0.147810 0.651479 3.078570 -1.002492 1.024029 -0.574410 -0.369871 -0.288653 ... -1.026858 -1.033914 -0.752176 0.201405 -0.359325 -0.116339 -0.270208 -0.068692 -0.489110 1.645210
1458 -0.872563 -0.093110 -0.080160 -0.795151 0.381743 -0.704406 0.539493 -0.574410 -0.865548 6.092188 ... -1.026858 -1.090059 2.168910 -0.704483 1.473789 -0.116339 -0.270208 -0.068692 -0.859110 1.645210
1459 -0.872563 0.224833 -0.058112 -0.795151 0.381743 -0.207594 -0.962566 -0.574410 0.847389 1.509640 ... -1.026858 -0.921624 5.121921 0.322190 -0.359325 -0.116339 -0.270208 -0.068692 -0.119110 0.138777

1460 rows × 34 columns

Encoding (converting Categorical cloumns in continous)¶

OneHot Encoding¶

In [24]:
X2=pd.get_dummies(X[cat])
In [25]:
X2
Out[25]:
MSZoning_C (all) MSZoning_FV MSZoning_RH MSZoning_RL MSZoning_RM Street_Grvl Street_Pave Alley_Grvl Alley_Pave LotShape_IR1 ... SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca SaleCondition_Family SaleCondition_Normal SaleCondition_Partial
0 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
2 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
3 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 1 0 0 0 0 0
4 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1455 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1456 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1457 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1458 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1459 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0

1460 rows × 252 columns

Detecting & Treating Outliers¶

In [26]:
out=[]
for i in con:
    out.extend((X1[(X1[i]>3) | (X1[i]<-3)]).index)
print(out)
[9, 48, 93, 125, 165, 246, 291, 300, 312, 335, 411, 488, 520, 535, 635, 637, 703, 705, 713, 861, 969, 985, 1030, 1062, 1144, 1186, 1190, 1266, 1393, 1416, 171, 197, 231, 277, 313, 446, 807, 909, 934, 1107, 1127, 1173, 1182, 1211, 1298, 1337, 53, 249, 313, 335, 384, 451, 457, 661, 706, 769, 848, 1298, 1396, 375, 533, 88, 185, 191, 218, 241, 250, 304, 375, 378, 398, 461, 508, 519, 583, 676, 703, 726, 745, 980, 991, 1123, 1149, 1213, 1268, 1327, 1352, 1435, 1457, 304, 630, 747, 1132, 1137, 1349, 37, 58, 70, 105, 115, 161, 169, 178, 224, 297, 349, 403, 477, 517, 523, 654, 691, 718, 755, 763, 798, 808, 825, 898, 981, 1111, 1169, 1228, 1289, 1298, 1373, 1417, 70, 178, 523, 898, 1182, 1298, 24, 52, 113, 116, 153, 166, 233, 253, 260, 263, 271, 273, 313, 322, 355, 414, 440, 446, 470, 493, 542, 548, 577, 586, 599, 666, 697, 764, 785, 828, 842, 854, 888, 918, 923, 924, 1040, 1059, 1077, 1152, 1220, 1253, 1299, 1308, 1320, 1369, 1387, 1418, 1445, 1458, 137, 224, 278, 477, 496, 581, 678, 774, 798, 932, 1267, 224, 332, 440, 496, 523, 691, 1044, 1182, 1298, 1373, 224, 440, 496, 523, 529, 691, 898, 1024, 1044, 1182, 1298, 1373, 304, 691, 1169, 1182, 118, 185, 197, 304, 496, 523, 608, 635, 691, 769, 798, 1169, 1182, 1268, 1298, 1353, 53, 188, 313, 326, 335, 420, 454, 588, 634, 738, 807, 921, 942, 1163, 1270, 1298, 1, 26, 33, 37, 41, 50, 93, 116, 129, 176, 197, 201, 213, 215, 218, 245, 249, 251, 253, 298, 299, 314, 330, 352, 358, 367, 414, 421, 426, 499, 504, 558, 574, 576, 580, 597, 611, 628, 633, 658, 691, 697, 717, 741, 743, 745, 814, 828, 892, 920, 925, 931, 944, 952, 953, 954, 1006, 1029, 1041, 1047, 1052, 1055, 1069, 1072, 1076, 1080, 1103, 1118, 1123, 1149, 1156, 1181, 1213, 1225, 1276, 1287, 1327, 1335, 1350, 1389, 1405, 1415, 188, 298, 597, 624, 628, 921, 1154, 1163, 1230, 1283, 1350, 1450, 53, 144, 189, 291, 330, 570, 634, 635, 843, 897, 1163, 1213, 1270, 1350, 8, 9, 17, 39, 48, 74, 78, 93, 102, 137, 144, 165, 188, 246, 330, 342, 420, 441, 454, 488, 505, 520, 529, 570, 634, 635, 637, 676, 703, 705, 728, 736, 778, 809, 843, 886, 894, 897, 910, 913, 921, 940, 942, 943, 954, 955, 984, 1003, 1011, 1030, 1062, 1090, 1163, 1186, 1216, 1230, 1232, 1266, 1275, 1283, 1292, 1336, 1350, 1391, 1393, 1412, 1416, 1450, 185, 635, 769, 803, 897, 910, 1031, 1173, 1230, 1298, 1350, 1386, 166, 309, 605, 642, 1298, 93, 653, 178, 581, 664, 825, 1061, 1190, 1298, 53, 64, 166, 169, 335, 343, 357, 480, 661, 769, 828, 848, 893, 961, 974, 1044, 1068, 1210, 1312, 1313, 1423, 1459, 28, 185, 293, 495, 499, 523, 583, 591, 645, 664, 666, 713, 735, 775, 784, 807, 854, 875, 947, 961, 996, 1184, 1193, 1292, 1298, 1328, 1369, 3, 7, 154, 197, 260, 306, 314, 325, 328, 358, 365, 380, 459, 462, 496, 520, 577, 630, 648, 653, 660, 662, 718, 720, 747, 799, 813, 836, 840, 918, 939, 945, 1013, 1030, 1081, 1119, 1139, 1150, 1152, 1185, 1197, 1202, 1248, 1266, 1326, 1360, 1382, 1393, 1419, 1439, 1445, 5, 55, 120, 129, 159, 182, 187, 205, 237, 258, 280, 546, 704, 726, 744, 889, 941, 1080, 1156, 1161, 1181, 1346, 1437, 46, 72, 80, 104, 176, 185, 189, 196, 289, 297, 312, 339, 351, 359, 360, 366, 400, 426, 471, 475, 550, 605, 618, 625, 647, 673, 764, 769, 785, 795, 803, 828, 830, 854, 859, 887, 888, 907, 919, 944, 1037, 1055, 1067, 1070, 1154, 1171, 1184, 1228, 1282, 1293, 1301, 1320, 1328, 1386, 1414, 197, 810, 1170, 1182, 1298, 1386, 1423]
In [27]:
import numpy as np
In [28]:
outliers=np.unique(out)

Droping Outliers from X & Y¶

In [29]:
X1.drop(index=outliers,axis=0,inplace=True)
X2.drop(index=outliers,axis=0,inplace=True)
Y.drop(index=outliers,axis=0,inplace=True)
In [30]:
X1.shape
Out[30]:
(1036, 34)
In [31]:
X2.shape
Out[31]:
(1036, 252)
In [32]:
Y.shape
Out[32]:
(1036,)

Joining X[Cat] & X[Con]¶

In [33]:
Xnew=X1.join(X2)
In [34]:
Xnew.shape
Out[34]:
(1036, 286)

Reindexing X & Y¶

In [35]:
Xnew.index=range(0,1036)
Y.index=range(0,1036)
In [36]:
Xnew.head()
Out[36]:
MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 ... SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca SaleCondition_Family SaleCondition_Normal SaleCondition_Partial
0 0.073375 -0.229372 -0.207142 0.651479 -0.5172 1.050994 0.878668 0.511418 0.575425 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
1 0.073375 -0.093110 0.073480 0.651479 -0.5172 0.984752 0.830215 0.323060 0.092907 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
2 0.073375 0.633618 0.375148 1.374795 -0.5172 0.951632 0.733308 1.364570 0.463568 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
3 -0.872563 0.224833 -0.043379 1.374795 -0.5172 1.084115 0.975575 0.456019 2.029558 -0.288653 ... 0 0 0 1 0 0 0 0 1 0
4 -0.872563 -0.002269 0.068469 -0.795151 -0.5172 -0.207594 -0.962566 -0.574410 1.014077 -0.288653 ... 0 0 0 1 0 0 0 0 1 0

5 rows × 286 columns

In [37]:
Xnew.columns
Out[37]:
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
       ...
       'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
       'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
       'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
       'SaleCondition_Partial'],
      dtype='object', length=286)

Spliting training & testing set¶

In [38]:
from sklearn.model_selection import train_test_split
In [39]:
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
In [40]:
xtrain.shape
Out[40]:
(828, 286)
In [41]:
xtrain.columns
Out[41]:
Index(['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
       ...
       'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
       'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
       'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal',
       'SaleCondition_Partial'],
      dtype='object', length=286)
In [42]:
ytrain.shape
Out[42]:
(828,)

Feature Selection¶

Backward Elimination¶

In [43]:
from statsmodels.api import OLS,add_constant
In [44]:
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
In [45]:
rsq=round(model.rsquared_adj,4)
In [46]:
rsq
Out[46]:
0.9332
In [47]:
model.pvalues.sort_values()
Out[47]:
Neighborhood_StoneBr    3.601855e-15
BsmtExposure_Gd         2.906979e-12
BsmtFinSF1              6.080131e-09
OverallQual             4.106494e-08
TotalBsmtSF             4.479796e-08
                            ...     
GarageQual_Po                    NaN
PoolQC_Ex                        NaN
PoolQC_Fa                        NaN
MiscFeature_TenC                 NaN
SaleType_CWD                     NaN
Length: 286, dtype: float64
In [48]:
col_to_drop=model.pvalues.sort_values().index[-1]
In [49]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop MiscFeature_TenC
In [50]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop PoolQC_Fa
In [51]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop PoolQC_Ex
In [52]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop GarageQual_Po
In [53]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop GarageType_2Types
In [54]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop Functional_Sev
In [55]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop Electrical_Mix
In [56]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop HeatingQC_Po
In [57]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop Heating_OthW
In [58]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop KitchenQual_Fa
In [59]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop HouseStyle_1.5Fin
In [60]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop MiscFeature_Gar2
In [61]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9332
column to drop MiscFeature_Shed
In [62]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop BsmtFinType1_BLQ
In [63]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop BsmtExposure_Av
In [64]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop Condition2_RRAn
In [65]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop LandSlope_Gtl
In [66]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop RoofStyle_Flat
In [67]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop RoofMatl_Metal
In [68]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop RoofMatl_Tar&Grv
Adjusted r_squared 0.9333
column to drop Exterior2nd_CmentBd
In [69]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop Exterior1st_CemntBd
In [70]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop Exterior2nd_MetalSd
In [71]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9333
column to drop Exterior2nd_Stone
In [72]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9334
column to drop Exterior2nd_Wd Shng
In [73]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335
column to drop Utilities_NoSeWa
In [74]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335
column to drop Electrical_FuseF
In [75]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335
column to drop RoofMatl_Roll
In [76]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335
column to drop Condition1_RRAn
In [77]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335
column to drop Condition2_PosA
In [78]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335
column to drop Heating_Floor
In [79]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9335
column to drop Heating_GasW
In [80]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9336
column to drop Foundation_Wood
In [81]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9336
column to drop Foundation_CBlock
In [82]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9337
column to drop Condition2_RRAe
In [83]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9337
column to drop ExterCond_Ex
In [84]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9337
column to drop Exterior1st_Stone
In [85]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9337
column to drop YrSold
In [86]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9338
column to drop BsmtCond_Po
In [87]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9338
column to drop Exterior1st_AsphShn
In [88]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9338
column to drop Exterior2nd_HdBoard
In [89]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9339
column to drop Exterior2nd_Other
In [90]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9339
column to drop Exterior2nd_Plywood
In [91]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop Condition2_Artery
In [92]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop BldgType_2fmCon
In [93]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop MasVnrType_BrkFace
In [94]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop Street_Grvl
In [95]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop SaleType_COD
In [96]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop RoofMatl_ClyTile
In [97]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop RoofMatl_Membran
In [98]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop GarageCond_TA
In [99]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop RoofMatl_WdShngl
In [100]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop MSZoning_RH
In [101]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop Condition2_PosN
In [102]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.934
column to drop GarageCond_Gd
In [103]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9341
column to drop ExterCond_Fa
In [104]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9341
column to drop RoofStyle_Gambrel
In [105]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9341
column to drop HouseStyle_2Story
In [106]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9342
column to drop TotRmsAbvGrd
In [107]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9343
column to drop Foundation_PConc
In [108]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9344
column to drop RoofStyle_Shed
In [109]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9344
column to drop KitchenQual_TA
In [110]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9345
column to drop Exterior2nd_BrkFace
In [111]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9346
column to drop Exterior2nd_Brk Cmn
In [112]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9347
column to drop Functional_Maj1
In [113]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9347
column to drop Condition2_Feedr
In [114]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9347
column to drop Condition2_RRNn
In [115]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9348
column to drop ExterCond_Po
In [116]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9348
column to drop SaleType_ConLI
In [117]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9349
column to drop BsmtFinType2_Rec
In [118]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9349
column to drop BsmtFinType2_BLQ
In [119]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.935
column to drop Condition1_RRNe
In [120]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9351
column to drop Electrical_FuseP
In [121]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9352
column to drop EnclosedPorch
In [122]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9353
column to drop Exterior1st_CBlock
In [123]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9353
column to drop Exterior2nd_CBlock
In [124]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9354
column to drop MasVnrType_BrkCmn
In [125]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9355
column to drop Neighborhood_SawyerW
In [126]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9355
column to drop Neighborhood_Somerst
In [127]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9356
column to drop Exterior2nd_AsphShn
In [128]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9356
column to drop Neighborhood_Blueste
In [129]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9357
column to drop KitchenQual_Gd
In [130]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9358
column to drop BsmtFinType1_Rec
In [131]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9359
column to drop Exterior1st_BrkFace
In [132]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.936
column to drop ScreenPorch
In [133]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9361
column to drop BsmtQual_TA
In [134]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9361
column to drop GarageType_Detchd
In [135]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9361
column to drop GarageType_BuiltIn
In [136]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9362
column to drop GarageType_Basment
In [137]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9363
column to drop BldgType_Twnhs
In [138]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9364
column to drop BsmtCond_Fa
In [139]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9364
column to drop Street_Pave
In [140]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9364
column to drop MasVnrType_None
In [141]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9365
column to drop BsmtFullBath
In [142]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9366
column to drop BsmtUnfSF
In [143]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9366
column to drop RoofStyle_Hip
In [144]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9367
column to drop ExterCond_Gd
In [145]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9368
column to drop Neighborhood_Veenker
In [146]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9369
column to drop Condition1_Feedr
In [147]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.937
column to drop Exterior2nd_AsbShng
In [148]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.937
column to drop Neighborhood_BrkSide
In [149]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9371
column to drop Neighborhood_MeadowV
In [150]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9372
column to drop SaleType_ConLD
In [151]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9373
column to drop GarageType_CarPort
In [152]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9374
column to drop MoSold
In [153]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9375
column to drop MSZoning_RL
In [154]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9375
column to drop Neighborhood_ClearCr
In [155]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9376
column to drop Electrical_SBrkr
In [156]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9377
column to drop Condition1_RRNn
In [157]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9378
column to drop LotConfig_FR3
In [158]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9378
column to drop LotConfig_FR2
In [159]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9379
column to drop Condition2_Norm
In [160]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9379
column to drop Fence_MnWw
In [161]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9379
column to drop SaleType_ConLw
In [162]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.938
column to drop SaleType_Con
In [163]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9381
column to drop Functional_Maj2
In [164]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9382
column to drop Functional_Min1
In [165]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9382
column to drop PavedDrive_P
In [166]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9382
column to drop Functional_Min2
In [167]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9383
column to drop LandSlope_Mod
In [168]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9384
column to drop LotShape_IR3
In [169]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9384
column to drop RoofMatl_WdShake
In [170]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9385
column to drop SaleType_WD
In [171]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9385
column to drop GarageYrBlt
In [172]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9386
column to drop GarageCond_Ex
In [173]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9386
column to drop GarageQual_Ex
In [174]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9386
column to drop GarageQual_TA
In [175]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9387
column to drop Neighborhood_SWISU
In [176]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9387
column to drop BsmtFinType2_LwQ
In [177]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9388
column to drop Neighborhood_IDOTRR
In [178]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9388
column to drop Heating_Grav
In [179]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9389
column to drop Heating_GasA
In [180]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.939
column to drop Exterior1st_MetalSd
In [181]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.939
column to drop BsmtFinType1_Unf
In [182]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9391
column to drop BsmtFinType1_ALQ
In [183]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9391
column to drop GarageCond_Fa
In [184]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9392
column to drop RoofStyle_Gable
In [185]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393
column to drop MiscFeature_Othr
In [186]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393
column to drop SaleCondition_Normal
In [187]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393
column to drop Fence_GdPrv
In [188]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393
column to drop FireplaceQu_Fa
In [189]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9393
column to drop LandSlope_Sev
In [190]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9394
column to drop BsmtFinType2_ALQ
In [191]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9394
column to drop Condition1_PosN
In [192]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9395
column to drop Exterior1st_BrkComm
In [193]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9395
column to drop ExterQual_TA
In [194]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9395
column to drop ExterQual_Gd
In [195]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9396
column to drop SaleType_Oth
In [196]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9396
column to drop Exterior1st_AsbShng
In [197]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9396
column to drop Neighborhood_Sawyer
In [198]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9397
column to drop Neighborhood_Gilbert
In [199]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9397
column to drop Neighborhood_CollgCr
In [200]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9398
column to drop Neighborhood_Timber
In [201]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9398
column to drop GarageCars
In [202]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9398
column to drop GarageQual_Gd
In [203]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399
column to drop Neighborhood_NAmes
In [204]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399
column to drop BsmtQual_Gd
In [205]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399
column to drop RoofStyle_Mansard
In [206]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9399
column to drop Neighborhood_Mitchel
In [207]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.94
column to drop Condition1_Norm
In [208]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.94
column to drop GrLivArea
In [209]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.94
column to drop HouseStyle_2.5Fin
In [210]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401
column to drop HouseStyle_1Story
In [211]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401
column to drop GarageCond_Po
In [212]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401
column to drop FireplaceQu_Ex
In [213]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401
column to drop FireplaceQu_TA
In [214]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop FireplaceQu_Gd
In [215]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop BldgType_1Fam
In [216]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop OpenPorchSF
In [217]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop LotFrontage
In [218]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop LotShape_IR1
In [219]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop LotShape_Reg
In [220]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop HouseStyle_2.5Unf
In [221]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop SaleCondition_Family
In [222]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop Fence_GdWo
In [223]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop SaleType_New
In [224]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop BsmtFinSF2
In [225]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop BsmtFinType2_Unf
In [226]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop ExterQual_Fa
In [227]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9402
column to drop Exterior1st_ImStucc
In [228]:
Xnew=Xnew.drop(labels=col_to_drop,axis=1)
xtrain,xtest,ytrain,ytest=train_test_split(Xnew,Y,test_size=0.2,random_state=54)
Xconst=add_constant(xtrain)
ols=OLS(ytrain,Xconst)
model=ols.fit()
rsq=round(model.rsquared_adj,4)
col_to_drop=model.pvalues.sort_values().index[-1]
print('Adjusted r_squared',rsq)
print('column to drop',col_to_drop)
Adjusted r_squared 0.9401
column to drop LotShape_IR2
In [229]:
Xnew.columns
Out[229]:
Index(['MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt',
       'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'TotalBsmtSF', '1stFlrSF',
       ...
       'GarageFinish_Unf', 'GarageQual_Fa', 'PavedDrive_N', 'PavedDrive_Y',
       'PoolQC_Gd', 'Fence_MnPrv', 'SaleCondition_Abnorml',
       'SaleCondition_AdjLand', 'SaleCondition_Alloca',
       'SaleCondition_Partial'],
      dtype='object', length=105)
In [230]:
len(Xnew.columns)
Out[230]:
105

Applying linear regression¶

Finding Mean Absolute training & testing error¶

In [231]:
from sklearn.linear_model import LinearRegression
In [232]:
lm=LinearRegression()
In [233]:
model=lm.fit(xtrain,ytrain)
In [234]:
tr_pred=model.predict(xtrain)
In [235]:
ts_pred=model.predict(xtest)
In [236]:
from sklearn.metrics import mean_squared_error,mean_absolute_error
In [237]:
tr_err=mean_squared_error(ytrain,tr_pred)
ts_err=mean_squared_error(ytest,ts_pred)
tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)
print('train_err',tr_err)
print('test_err',ts_err)
print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_err 245577350.32125604
test_err 466646940.1105769
train_ab 10879.193236714977
test_ab 14506.370192307691

Model is overfitted (testing error > training error)¶

Applying Regularisation Technique¶

By Ridge¶

In [238]:
from sklearn.linear_model import Ridge
In [239]:
rr=Ridge(alpha=0.2)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)

tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)

print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_ab 10879.632101245055
test_ab 14234.733770509745

Storing alpha range in a list for cross validation¶

In [240]:
w=[]
e=0.01
for i in range(0,500,1):
    w.append(e)
    e=round(e+0.01,2)

Cross Validation¶

Grid Search CV¶

In [241]:
rr=Ridge()
tg={'alpha':w}

from sklearn.model_selection import GridSearchCV

cv=GridSearchCV(rr,tg,scoring='neg_mean_absolute_error',cv=4)
cvmodel=cv.fit(Xnew,Y)
In [242]:
cvmodel.best_params_
Out[242]:
{'alpha': 2.36}
In [243]:
rr=Ridge(alpha=2.36)
model=rr.fit(xtrain,ytrain)
tr_pred=model.predict(xtrain)
ts_pred=model.predict(xtest)

tr_ab=mean_absolute_error(ytrain,tr_pred)
ts_ab=mean_absolute_error(ytest,ts_pred)

print('train_ab',tr_ab)
print('test_ab',ts_ab)
train_ab 11105.32108271729
test_ab 13265.776840227652
In [244]:
cvmodel.best_estimator_
Out[244]:
Ridge(alpha=2.36)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Ridge(alpha=2.36)

Reading Testing data set¶

In [245]:
df2=pd.read_csv('testing_set.csv')
In [246]:
df2
Out[246]:
Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape LandContour Utilities ... ScreenPorch PoolArea PoolQC Fence MiscFeature MiscVal MoSold YrSold SaleType SaleCondition
0 1461 20 RH 80.0 11622 Pave NaN Reg Lvl AllPub ... 120 0 NaN MnPrv NaN 0 6 2010 WD Normal
1 1462 20 RL 81.0 14267 Pave NaN IR1 Lvl AllPub ... 0 0 NaN NaN Gar2 12500 6 2010 WD Normal
2 1463 60 RL 74.0 13830 Pave NaN IR1 Lvl AllPub ... 0 0 NaN MnPrv NaN 0 3 2010 WD Normal
3 1464 60 RL 78.0 9978 Pave NaN IR1 Lvl AllPub ... 0 0 NaN NaN NaN 0 6 2010 WD Normal
4 1465 120 RL 43.0 5005 Pave NaN IR1 HLS AllPub ... 144 0 NaN NaN NaN 0 1 2010 WD Normal
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 2915 160 RM 21.0 1936 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 6 2006 WD Normal
1455 2916 160 RM 21.0 1894 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 4 2006 WD Abnorml
1456 2917 20 RL 160.0 20000 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 9 2006 WD Abnorml
1457 2918 85 RL 62.0 10441 Pave NaN Reg Lvl AllPub ... 0 0 NaN MnPrv Shed 700 7 2006 WD Normal
1458 2919 60 RL 74.0 9627 Pave NaN Reg Lvl AllPub ... 0 0 NaN NaN NaN 0 11 2006 WD Normal

1459 rows × 80 columns

In [247]:
df2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Data columns (total 80 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Id             1459 non-null   int64  
 1   MSSubClass     1459 non-null   int64  
 2   MSZoning       1455 non-null   object 
 3   LotFrontage    1232 non-null   float64
 4   LotArea        1459 non-null   int64  
 5   Street         1459 non-null   object 
 6   Alley          107 non-null    object 
 7   LotShape       1459 non-null   object 
 8   LandContour    1459 non-null   object 
 9   Utilities      1457 non-null   object 
 10  LotConfig      1459 non-null   object 
 11  LandSlope      1459 non-null   object 
 12  Neighborhood   1459 non-null   object 
 13  Condition1     1459 non-null   object 
 14  Condition2     1459 non-null   object 
 15  BldgType       1459 non-null   object 
 16  HouseStyle     1459 non-null   object 
 17  OverallQual    1459 non-null   int64  
 18  OverallCond    1459 non-null   int64  
 19  YearBuilt      1459 non-null   int64  
 20  YearRemodAdd   1459 non-null   int64  
 21  RoofStyle      1459 non-null   object 
 22  RoofMatl       1459 non-null   object 
 23  Exterior1st    1458 non-null   object 
 24  Exterior2nd    1458 non-null   object 
 25  MasVnrType     1443 non-null   object 
 26  MasVnrArea     1444 non-null   float64
 27  ExterQual      1459 non-null   object 
 28  ExterCond      1459 non-null   object 
 29  Foundation     1459 non-null   object 
 30  BsmtQual       1415 non-null   object 
 31  BsmtCond       1414 non-null   object 
 32  BsmtExposure   1415 non-null   object 
 33  BsmtFinType1   1417 non-null   object 
 34  BsmtFinSF1     1458 non-null   float64
 35  BsmtFinType2   1417 non-null   object 
 36  BsmtFinSF2     1458 non-null   float64
 37  BsmtUnfSF      1458 non-null   float64
 38  TotalBsmtSF    1458 non-null   float64
 39  Heating        1459 non-null   object 
 40  HeatingQC      1459 non-null   object 
 41  CentralAir     1459 non-null   object 
 42  Electrical     1459 non-null   object 
 43  1stFlrSF       1459 non-null   int64  
 44  2ndFlrSF       1459 non-null   int64  
 45  LowQualFinSF   1459 non-null   int64  
 46  GrLivArea      1459 non-null   int64  
 47  BsmtFullBath   1457 non-null   float64
 48  BsmtHalfBath   1457 non-null   float64
 49  FullBath       1459 non-null   int64  
 50  HalfBath       1459 non-null   int64  
 51  BedroomAbvGr   1459 non-null   int64  
 52  KitchenAbvGr   1459 non-null   int64  
 53  KitchenQual    1458 non-null   object 
 54  TotRmsAbvGrd   1459 non-null   int64  
 55  Functional     1457 non-null   object 
 56  Fireplaces     1459 non-null   int64  
 57  FireplaceQu    729 non-null    object 
 58  GarageType     1383 non-null   object 
 59  GarageYrBlt    1381 non-null   float64
 60  GarageFinish   1381 non-null   object 
 61  GarageCars     1458 non-null   float64
 62  GarageArea     1458 non-null   float64
 63  GarageQual     1381 non-null   object 
 64  GarageCond     1381 non-null   object 
 65  PavedDrive     1459 non-null   object 
 66  WoodDeckSF     1459 non-null   int64  
 67  OpenPorchSF    1459 non-null   int64  
 68  EnclosedPorch  1459 non-null   int64  
 69  3SsnPorch      1459 non-null   int64  
 70  ScreenPorch    1459 non-null   int64  
 71  PoolArea       1459 non-null   int64  
 72  PoolQC         3 non-null      object 
 73  Fence          290 non-null    object 
 74  MiscFeature    51 non-null     object 
 75  MiscVal        1459 non-null   int64  
 76  MoSold         1459 non-null   int64  
 77  YrSold         1459 non-null   int64  
 78  SaleType       1458 non-null   object 
 79  SaleCondition  1459 non-null   object 
dtypes: float64(11), int64(26), object(43)
memory usage: 912.0+ KB
In [248]:
df2.shape
Out[248]:
(1459, 80)

Missing Data Treatment in testing dataset (Filling Null values)¶

In [249]:
df2.isna().sum()
Out[249]:
Id                 0
MSSubClass         0
MSZoning           4
LotFrontage      227
LotArea            0
                ... 
MiscVal            0
MoSold             0
YrSold             0
SaleType           1
SaleCondition      0
Length: 80, dtype: int64
In [250]:
for i in df2.columns:
        if (df2[i].isna().sum())>0:
            if df2[i].dtypes=='object':
                x=df2[i].mode()[0]
                df2[i]=df2[i].fillna(x)
            else:
                x=df2[i].mean()
                df2[i]=df2[i].fillna(x)
In [251]:
df2.isna().sum()
Out[251]:
Id               0
MSSubClass       0
MSZoning         0
LotFrontage      0
LotArea          0
                ..
MiscVal          0
MoSold           0
YrSold           0
SaleType         0
SaleCondition    0
Length: 80, dtype: int64

Droping irrelevant columns from testing dataset¶

Seperating Input & Output features from testing dataset (Xts & Yts)¶

In [252]:
Xts=df2.drop(labels=['Id','LowQualFinSF','MiscVal'],axis=1)
In [253]:
Xts.shape
Out[253]:
(1459, 77)

Seperating Categorical & Continuous columns in testing dataset¶

In [254]:
cat_ts=[]
con_ts=[]
for i in Xts.columns:
    if Xts[i].dtypes=='object':
        cat_ts.append(i)
    else:
        con_ts.append(i)
print(cat_ts)
print(con_ts)
['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 'SaleCondition']
['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MoSold', 'YrSold']
In [255]:
cat_ts=['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 
        'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 
        'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 
        'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 
        'GarageType', 'GarageFinish', 'GarageQual', 'GarageCond', 'PavedDrive', 'PoolQC', 'Fence', 'MiscFeature', 'SaleType', 
        'SaleCondition']
con_ts=['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 
        'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'GrLivArea', 'BsmtFullBath', 
        'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 
        'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 
        'MoSold', 'YrSold']

Standardisation of Continuous (Normalisation)¶

In [256]:
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
In [257]:
X1ts=pd.DataFrame(ss.fit_transform(Xts[con_ts]),columns=con_ts)
In [258]:
X1ts
Out[258]:
MSSubClass LotFrontage LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 BsmtFinSF2 ... GarageCars GarageArea WoodDeckSF OpenPorchSF EnclosedPorch 3SsnPorch ScreenPorch PoolArea MoSold YrSold
0 -0.874711 0.555587 0.363929 -0.751101 0.400766 -0.340945 -1.072885 -0.570108 0.063295 0.517348 ... -0.988013 1.185945 0.366678 -0.701628 -0.360738 -0.088827 1.818960 -0.057227 -0.038281 1.713905
1 -0.874711 0.604239 0.897861 -0.054877 0.400766 -0.439695 -1.214908 0.041273 1.063392 -0.297903 ... -0.988013 -0.741213 2.347867 -0.178826 -0.360738 -0.088827 -0.301543 -0.057227 -0.038281 1.713905
2 0.061351 0.263676 0.809646 -0.751101 -0.497418 0.844059 0.678742 -0.570108 0.773254 -0.297903 ... 0.301623 0.042559 0.930495 -0.207871 -0.360738 -0.088827 -0.301543 -0.057227 -1.140614 1.713905
3 0.061351 0.458284 0.032064 -0.054877 0.400766 0.876976 0.678742 -0.456889 0.357829 -0.297903 ... 0.301623 -0.012766 2.089451 -0.178826 -0.360738 -0.088827 -0.301543 -0.057227 -0.038281 1.713905
4 1.465443 -1.244533 -0.971808 1.337571 -0.497418 0.679475 0.394694 -0.570108 -0.387298 -0.297903 ... 0.301623 0.153210 -0.729632 0.489198 -0.360738 -0.088827 2.243060 -0.057227 -1.875504 1.713905
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 2.401505 -2.314875 -1.591330 -1.447325 1.298950 -0.044694 -0.646813 -0.570108 -0.965376 -0.297903 ... -2.277648 -2.179665 -0.729632 -0.701628 -0.360738 -0.088827 -0.301543 -0.057227 -0.038281 -1.359958
1455 2.401505 -2.314875 -1.599808 -1.447325 -0.497418 -0.044694 -0.646813 -0.570108 -0.411477 -0.297903 ... -0.988013 -0.861084 -0.729632 -0.353093 -0.360738 -0.088827 -0.301543 -0.057227 -0.773170 -1.359958
1456 -0.874711 4.447740 2.055150 -0.751101 1.298950 -0.373861 0.584059 -0.570108 1.724994 -0.297903 ... 0.301623 0.475939 2.982161 -0.701628 -0.360738 -0.088827 -0.301543 -0.057227 1.064053 -1.359958
1457 0.646389 -0.320147 0.125527 -0.751101 -0.497418 0.679475 0.394694 -0.570108 -0.224645 -0.297903 ... -2.277648 -2.179665 -0.103169 -0.236915 -0.360738 -0.088827 -0.301543 -0.057227 0.329164 -1.359958
1458 0.061351 0.263676 -0.038790 0.641347 -0.497418 0.712392 0.489377 -0.037980 0.700719 -0.297903 ... 1.591258 0.817111 0.758218 -0.004559 -0.360738 -0.088827 -0.301543 -0.057227 1.798942 -1.359958

1459 rows × 34 columns

Encoding ( converting Categorical cloumns in continous)¶

OneHot Encoding¶

In [259]:
X2ts=pd.get_dummies(Xts[cat_ts])
In [260]:
X2ts
Out[260]:
MSZoning_C (all) MSZoning_FV MSZoning_RH MSZoning_RL MSZoning_RM Street_Grvl Street_Pave Alley_Grvl Alley_Pave LotShape_IR1 ... SaleType_ConLw SaleType_New SaleType_Oth SaleType_WD SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca SaleCondition_Family SaleCondition_Normal SaleCondition_Partial
0 0 0 1 0 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
2 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
3 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
4 0 0 0 1 0 0 1 1 0 1 ... 0 0 0 1 0 0 0 0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 0 0 0 0 1 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1455 0 0 0 0 1 0 1 1 0 0 ... 0 0 0 1 1 0 0 0 0 0
1456 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 1 0 0 0 0 0
1457 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0
1458 0 0 0 1 0 0 1 1 0 0 ... 0 0 0 1 0 0 0 0 1 0

1459 rows × 234 columns

Joining X1ts[Cat] & X2ts[Con]¶

In [261]:
Xnew_ts=X1ts.join(X2ts)
In [262]:
Xnew_ts.shape
Out[262]:
(1459, 268)

Reindexing Xnew_ts¶

In [263]:
Xnew_ts.index=range(0,1459)

Seperating traning dropped columns from testing dataset¶

In [264]:
keep=[]
drop=[]
for i in Xnew_ts.columns:
    if i in Xnew.columns:
        keep.append(i)
    else:
        drop.append(i)
In [265]:
keep
Out[265]:
['MSSubClass',
 'LotArea',
 'OverallQual',
 'OverallCond',
 'YearBuilt',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtFinSF1',
 'TotalBsmtSF',
 '1stFlrSF',
 '2ndFlrSF',
 'BsmtHalfBath',
 'FullBath',
 'HalfBath',
 'BedroomAbvGr',
 'KitchenAbvGr',
 'Fireplaces',
 'GarageArea',
 'WoodDeckSF',
 '3SsnPorch',
 'PoolArea',
 'MSZoning_C (all)',
 'MSZoning_FV',
 'MSZoning_RM',
 'Alley_Grvl',
 'Alley_Pave',
 'LotShape_IR2',
 'LandContour_Bnk',
 'LandContour_HLS',
 'LandContour_Low',
 'LandContour_Lvl',
 'Utilities_AllPub',
 'LotConfig_Corner',
 'LotConfig_CulDSac',
 'LotConfig_Inside',
 'Neighborhood_Blmngtn',
 'Neighborhood_BrDale',
 'Neighborhood_Crawfor',
 'Neighborhood_Edwards',
 'Neighborhood_NPkVill',
 'Neighborhood_NWAmes',
 'Neighborhood_NoRidge',
 'Neighborhood_NridgHt',
 'Neighborhood_OldTown',
 'Neighborhood_StoneBr',
 'Condition1_Artery',
 'Condition1_PosA',
 'Condition1_RRAe',
 'BldgType_Duplex',
 'BldgType_TwnhsE',
 'HouseStyle_1.5Unf',
 'HouseStyle_SFoyer',
 'HouseStyle_SLvl',
 'RoofMatl_CompShg',
 'Exterior1st_HdBoard',
 'Exterior1st_Plywood',
 'Exterior1st_Stucco',
 'Exterior1st_VinylSd',
 'Exterior1st_Wd Sdng',
 'Exterior1st_WdShing',
 'Exterior2nd_ImStucc',
 'Exterior2nd_Stucco',
 'Exterior2nd_VinylSd',
 'Exterior2nd_Wd Sdng',
 'MasVnrType_Stone',
 'ExterQual_Ex',
 'ExterCond_TA',
 'Foundation_BrkTil',
 'Foundation_Slab',
 'Foundation_Stone',
 'BsmtQual_Ex',
 'BsmtQual_Fa',
 'BsmtCond_Gd',
 'BsmtCond_TA',
 'BsmtExposure_Gd',
 'BsmtExposure_Mn',
 'BsmtExposure_No',
 'BsmtFinType1_GLQ',
 'BsmtFinType1_LwQ',
 'BsmtFinType2_GLQ',
 'Heating_Wall',
 'HeatingQC_Ex',
 'HeatingQC_Fa',
 'HeatingQC_Gd',
 'HeatingQC_TA',
 'CentralAir_N',
 'CentralAir_Y',
 'Electrical_FuseA',
 'KitchenQual_Ex',
 'Functional_Mod',
 'Functional_Typ',
 'FireplaceQu_Po',
 'GarageType_Attchd',
 'GarageFinish_Fin',
 'GarageFinish_RFn',
 'GarageFinish_Unf',
 'GarageQual_Fa',
 'PavedDrive_N',
 'PavedDrive_Y',
 'PoolQC_Gd',
 'Fence_MnPrv',
 'SaleCondition_Abnorml',
 'SaleCondition_AdjLand',
 'SaleCondition_Alloca',
 'SaleCondition_Partial']
In [266]:
drop
Out[266]:
['LotFrontage',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'GrLivArea',
 'BsmtFullBath',
 'TotRmsAbvGrd',
 'GarageYrBlt',
 'GarageCars',
 'OpenPorchSF',
 'EnclosedPorch',
 'ScreenPorch',
 'MoSold',
 'YrSold',
 'MSZoning_RH',
 'MSZoning_RL',
 'Street_Grvl',
 'Street_Pave',
 'LotShape_IR1',
 'LotShape_IR3',
 'LotShape_Reg',
 'LotConfig_FR2',
 'LotConfig_FR3',
 'LandSlope_Gtl',
 'LandSlope_Mod',
 'LandSlope_Sev',
 'Neighborhood_Blueste',
 'Neighborhood_BrkSide',
 'Neighborhood_ClearCr',
 'Neighborhood_CollgCr',
 'Neighborhood_Gilbert',
 'Neighborhood_IDOTRR',
 'Neighborhood_MeadowV',
 'Neighborhood_Mitchel',
 'Neighborhood_NAmes',
 'Neighborhood_SWISU',
 'Neighborhood_Sawyer',
 'Neighborhood_SawyerW',
 'Neighborhood_Somerst',
 'Neighborhood_Timber',
 'Neighborhood_Veenker',
 'Condition1_Feedr',
 'Condition1_Norm',
 'Condition1_PosN',
 'Condition1_RRAn',
 'Condition1_RRNe',
 'Condition1_RRNn',
 'Condition2_Artery',
 'Condition2_Feedr',
 'Condition2_Norm',
 'Condition2_PosA',
 'Condition2_PosN',
 'BldgType_1Fam',
 'BldgType_2fmCon',
 'BldgType_Twnhs',
 'HouseStyle_1.5Fin',
 'HouseStyle_1Story',
 'HouseStyle_2.5Unf',
 'HouseStyle_2Story',
 'RoofStyle_Flat',
 'RoofStyle_Gable',
 'RoofStyle_Gambrel',
 'RoofStyle_Hip',
 'RoofStyle_Mansard',
 'RoofStyle_Shed',
 'RoofMatl_Tar&Grv',
 'RoofMatl_WdShake',
 'RoofMatl_WdShngl',
 'Exterior1st_AsbShng',
 'Exterior1st_AsphShn',
 'Exterior1st_BrkComm',
 'Exterior1st_BrkFace',
 'Exterior1st_CBlock',
 'Exterior1st_CemntBd',
 'Exterior1st_MetalSd',
 'Exterior2nd_AsbShng',
 'Exterior2nd_AsphShn',
 'Exterior2nd_Brk Cmn',
 'Exterior2nd_BrkFace',
 'Exterior2nd_CBlock',
 'Exterior2nd_CmentBd',
 'Exterior2nd_HdBoard',
 'Exterior2nd_MetalSd',
 'Exterior2nd_Plywood',
 'Exterior2nd_Stone',
 'Exterior2nd_Wd Shng',
 'MasVnrType_BrkCmn',
 'MasVnrType_BrkFace',
 'MasVnrType_None',
 'ExterQual_Fa',
 'ExterQual_Gd',
 'ExterQual_TA',
 'ExterCond_Ex',
 'ExterCond_Fa',
 'ExterCond_Gd',
 'ExterCond_Po',
 'Foundation_CBlock',
 'Foundation_PConc',
 'Foundation_Wood',
 'BsmtQual_Gd',
 'BsmtQual_TA',
 'BsmtCond_Fa',
 'BsmtCond_Po',
 'BsmtExposure_Av',
 'BsmtFinType1_ALQ',
 'BsmtFinType1_BLQ',
 'BsmtFinType1_Rec',
 'BsmtFinType1_Unf',
 'BsmtFinType2_ALQ',
 'BsmtFinType2_BLQ',
 'BsmtFinType2_LwQ',
 'BsmtFinType2_Rec',
 'BsmtFinType2_Unf',
 'Heating_GasA',
 'Heating_GasW',
 'Heating_Grav',
 'HeatingQC_Po',
 'Electrical_FuseF',
 'Electrical_FuseP',
 'Electrical_SBrkr',
 'KitchenQual_Fa',
 'KitchenQual_Gd',
 'KitchenQual_TA',
 'Functional_Maj1',
 'Functional_Maj2',
 'Functional_Min1',
 'Functional_Min2',
 'Functional_Sev',
 'FireplaceQu_Ex',
 'FireplaceQu_Fa',
 'FireplaceQu_Gd',
 'FireplaceQu_TA',
 'GarageType_2Types',
 'GarageType_Basment',
 'GarageType_BuiltIn',
 'GarageType_CarPort',
 'GarageType_Detchd',
 'GarageQual_Gd',
 'GarageQual_Po',
 'GarageQual_TA',
 'GarageCond_Ex',
 'GarageCond_Fa',
 'GarageCond_Gd',
 'GarageCond_Po',
 'GarageCond_TA',
 'PavedDrive_P',
 'PoolQC_Ex',
 'Fence_GdPrv',
 'Fence_GdWo',
 'Fence_MnWw',
 'MiscFeature_Gar2',
 'MiscFeature_Othr',
 'MiscFeature_Shed',
 'SaleType_COD',
 'SaleType_CWD',
 'SaleType_Con',
 'SaleType_ConLD',
 'SaleType_ConLI',
 'SaleType_ConLw',
 'SaleType_New',
 'SaleType_Oth',
 'SaleType_WD',
 'SaleCondition_Family',
 'SaleCondition_Normal']
In [267]:
len(Xnew_ts.columns)
Out[267]:
268

Creating Final Xnew testing dataset¶

In [268]:
Xnewtest=Xnew_ts[keep]
In [269]:
len(Xnewtest.columns)
Out[269]:
105
In [270]:
Xnewtest
Out[270]:
MSSubClass LotArea OverallQual OverallCond YearBuilt YearRemodAdd MasVnrArea BsmtFinSF1 TotalBsmtSF 1stFlrSF ... GarageFinish_Unf GarageQual_Fa PavedDrive_N PavedDrive_Y PoolQC_Gd Fence_MnPrv SaleCondition_Abnorml SaleCondition_AdjLand SaleCondition_Alloca SaleCondition_Partial
0 -0.874711 0.363929 -0.751101 0.400766 -0.340945 -1.072885 -0.570108 0.063295 -0.370808 -0.654561 ... 1 0 0 1 0 1 0 0 0 0
1 -0.874711 0.897861 -0.054877 0.400766 -0.439695 -1.214908 0.041273 1.063392 0.639144 0.433298 ... 1 0 0 1 0 1 0 0 0 0
2 0.061351 0.809646 -0.751101 -0.497418 0.844059 0.678742 -0.570108 0.773254 -0.266876 -0.574165 ... 0 0 0 1 0 1 0 0 0 0
3 0.061351 0.032064 -0.054877 0.400766 0.876976 0.678742 -0.456889 0.357829 -0.271395 -0.579190 ... 0 0 0 1 0 1 0 0 0 0
4 1.465443 -0.971808 1.337571 -0.497418 0.679475 0.394694 -0.570108 -0.387298 0.528434 0.310192 ... 0 0 0 1 0 1 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1454 2.401505 -1.591330 -1.447325 1.298950 -0.044694 -0.646813 -0.570108 -0.965376 -1.129968 -1.533893 ... 1 0 0 1 0 1 0 0 0 0
1455 2.401505 -1.599808 -1.447325 -0.497418 -0.044694 -0.646813 -0.570108 -0.411477 -1.129968 -1.533893 ... 1 0 0 1 0 1 1 0 0 0
1456 -0.874711 2.055150 -0.751101 1.298950 -0.373861 0.584059 -0.570108 1.724994 0.401907 0.169499 ... 1 0 0 1 0 1 1 0 0 0
1457 0.646389 0.125527 -0.751101 -0.497418 0.679475 0.394694 -0.570108 -0.224645 -0.303026 -0.468645 ... 1 0 0 1 0 1 0 0 0 0
1458 0.061351 -0.038790 0.641347 -0.497418 0.712392 0.489377 -0.037980 0.700719 -0.113237 -0.403324 ... 0 0 0 1 0 1 0 0 0 0

1459 rows × 105 columns

Xnew testing prediction¶

In [271]:
pred=cvmodel.predict(Xnewtest)
In [272]:
pred
Out[272]:
array([138429.47870899, 181922.3828758 , 202868.94014064, ...,
       196078.94599536, 119756.13433631, 222904.36483644])
In [273]:
predn={'Output':list(pred)}
In [274]:
predn
Out[274]:
{'Output': [138429.47870898744,
  181922.382875797,
  202868.94014064257,
  208819.39668189632,
  196351.0740042466,
  175876.8748191031,
  171840.84378863603,
  162109.3324577118,
  217398.76465224853,
  116522.36785071812,
  184373.8916630015,
  93185.85826060959,
  88185.70322395209,
  139578.42495252047,
  95311.86454471777,
  357983.38620497216,
  254734.1195666814,
  301680.01665617526,
  306996.42685454857,
  442605.7640678651,
  310106.84261126124,
  207064.08962930253,
  183312.26713828428,
  158973.36515351618,
  186585.49616143137,
  197269.7424678579,
  316747.70857901074,
  219800.42414222198,
  203807.83911753917,
  249273.44036505,
  198921.34156240872,
  99194.63319666908,
  203348.54848009563,
  263987.5642483522,
  280159.962987258,
  244068.69101453264,
  177820.69503769226,
  166068.7657410207,
  156801.67012471758,
  148089.5022390159,
  181325.8802817284,
  142317.1505100968,
  226360.77744849224,
  244758.88685854053,
  231131.56173733308,
  199156.6186742538,
  268538.1335615735,
  222324.8131231555,
  162138.21168641123,
  147865.71180125303,
  146438.75466751796,
  172584.88088080438,
  153983.68081041466,
  156370.7181904668,
  211972.84534546096,
  171247.99467263505,
  189413.1907731749,
  134272.3638708547,
  219143.0567658649,
  133762.89815045914,
  150684.8285094407,
  184990.402537329,
  119535.40914610267,
  134221.78617129457,
  123590.4633282915,
  90909.95470532763,
  112838.12401476597,
  142486.47204036222,
  160429.77426453197,
  199608.7289332977,
  109169.74998685948,
  83396.38548939154,
  149091.3835327451,
  131271.42093529052,
  153579.23503106058,
  102235.50550628404,
  53860.10836451892,
  156364.26870319387,
  211481.44623498066,
  108719.0164026559,
  150695.79490344157,
  163970.72051835858,
  191834.96757868605,
  80596.82878425242,
  122254.07337923843,
  132474.39208757563,
  142815.9219598811,
  151159.61927694664,
  119972.29955485958,
  142182.14881957622,
  108491.53768645828,
  155245.73725862362,
  153695.99851778263,
  108133.53792153456,
  184588.43326195696,
  60794.52097317633,
  75710.38502720068,
  99777.5553093862,
  73821.38663859939,
  104414.93058157645,
  122147.67563088138,
  122879.69076955924,
  130091.49846283115,
  160286.84203232254,
  146933.0309892454,
  250336.6095738182,
  86141.62406183744,
  230956.49960938183,
  144653.23504051083,
  137191.67691366264,
  90050.26197831647,
  153773.02627507597,
  273784.75947914005,
  138307.13737741127,
  222827.71578544087,
  252885.28246893443,
  188199.73438389797,
  158048.7793930824,
  138901.86565161435,
  196572.9442078569,
  165699.46258126613,
  128876.36038490187,
  285854.7239019137,
  224590.84940135074,
  132066.41143492877,
  54924.44531429831,
  98031.28408790432,
  169733.77913361852,
  102742.91085200378,
  127403.70141050374,
  93331.99003841201,
  123474.91027639591,
  119167.06204529296,
  160413.28760884542,
  119777.76435210882,
  234823.8029820997,
  216262.09921555477,
  233171.13192139863,
  178949.78977719674,
  164849.40758357957,
  55978.39170065048,
  130950.24707175966,
  54562.795991741354,
  279590.877099331,
  241219.84107461947,
  167972.35788709478,
  190898.14103562332,
  208236.43100193207,
  185953.7579636155,
  153100.9084678339,
  142323.1280684393,
  196230.0070926562,
  182140.34746853687,
  137637.21958531326,
  95502.48712696129,
  70443.81123380053,
  91191.45642384823,
  123703.91548119401,
  152765.61284557925,
  191595.6095689174,
  144391.57636859705,
  150208.82473249768,
  256974.77400157362,
  218200.39607646287,
  137235.0742472443,
  170307.73883919214,
  197027.13091269243,
  285069.38569627923,
  179432.1168146612,
  357424.2379638636,
  227712.00130326877,
  251497.40843438866,
  176670.90446118233,
  188766.5521869203,
  177312.11950813665,
  149254.25481734792,
  207907.86109171086,
  198340.44883383115,
  187906.3923094663,
  254280.51213443454,
  179594.74318961852,
  259109.88434878836,
  223727.1622549576,
  239769.27260167836,
  203931.90421287902,
  152561.87456128723,
  162644.66578562243,
  129423.87709931233,
  135636.29798734438,
  117401.34762603241,
  116920.15903022973,
  89162.73304037763,
  92618.32617502517,
  143337.37781415659,
  119877.35779827306,
  134950.34965314568,
  150674.74943881977,
  140625.37518365044,
  116529.41440579123,
  152617.86830511334,
  417121.3982114038,
  385172.5842433307,
  369909.0447555126,
  450225.91411110427,
  320232.5893589995,
  326459.97796087014,
  388466.8444632839,
  339572.4655591588,
  312381.4255479582,
  342699.58690108114,
  257941.04521642387,
  386944.86256141216,
  291702.3256386048,
  246076.76974353724,
  201576.4024107561,
  203465.93511894246,
  220602.11366330675,
  449677.1857577072,
  374521.1036355196,
  329929.76694164274,
  259400.26174930052,
  316668.70250290836,
  187803.49798851178,
  177448.39292743866,
  172928.37859446314,
  166271.10031468287,
  165655.24247293003,
  189524.9874821328,
  191048.81355442078,
  192700.70885532408,
  181620.4754535121,
  266194.366716956,
  171984.16708441268,
  185740.3849633059,
  164076.14208794097,
  273255.1520550359,
  168359.71833188372,
  329562.5599378822,
  286164.1043581334,
  260216.01817554704,
  272902.8082557513,
  233546.6699808255,
  220947.55058122193,
  261236.13280867116,
  242270.5212373006,
  401674.3816206408,
  227834.4594419559,
  203241.59470907482,
  256352.25288947654,
  228219.56625861264,
  277578.6125249953,
  246933.1575013452,
  279158.2571216811,
  225439.8888477063,
  217121.39137476467,
  177539.8103064139,
  171549.95713128723,
  139144.87639915533,
  221260.93425642047,
  228780.64516216994,
  158549.70562250676,
  121765.48270378279,
  160772.3067795188,
  204364.2773314042,
  244622.09709575563,
  181507.60050579612,
  131515.47953702917,
  164719.53304469993,
  170821.56121789129,
  177715.7068649989,
  110148.45606121433,
  146745.63572795995,
  117681.63536123907,
  127837.64254301152,
  101536.36580517539,
  105730.0084296102,
  220997.93252565473,
  253623.38257031926,
  239534.04243322436,
  210188.24489826924,
  186168.11035863133,
  180727.40397832435,
  178094.9497584813,
  262909.65560435405,
  211440.97824693786,
  191017.92317171933,
  238904.84020179082,
  228828.21913386445,
  153888.92389727966,
  138490.87405815235,
  240729.36363899132,
  109524.59478181666,
  158979.50080283298,
  196415.62516401752,
  179394.39146235888,
  36191.554484188804,
  127186.41558924249,
  150789.5663590951,
  174501.51185627995,
  164556.0819609051,
  155036.0486237989,
  198293.0039812294,
  188553.35783442995,
  114365.25814924361,
  200635.21921313967,
  195910.70281905908,
  226913.0329815361,
  149523.71805309883,
  174874.38865589167,
  159956.00981687655,
  125816.47398612123,
  147887.85265752533,
  122901.43753088116,
  156823.33950441945,
  147866.21487565772,
  132551.01893685586,
  108276.7041415416,
  152519.04618332686,
  134488.57793160313,
  166513.12812966554,
  131578.60639746443,
  74295.00247835179,
  142345.80167591837,
  92466.49326598813,
  137689.35299122255,
  108464.21543218498,
  150135.28932641467,
  34835.372920343376,
  103994.81516461066,
  61481.066456295855,
  217850.0834282764,
  164803.95167191292,
  130967.65279250604,
  161482.8072914051,
  144930.22807076114,
  145033.24190551747,
  129687.77799950617,
  118202.32368768586,
  109027.62243616418,
  118977.7259002813,
  139289.02101185246,
  117899.05567483792,
  156460.37246593976,
  131481.055370663,
  135617.60592516372,
  126060.88076998547,
  147568.93494957974,
  125393.28292628648,
  128506.61495206409,
  135302.3561187263,
  75602.12349310378,
  120470.23521404457,
  129229.83410764945,
  93586.74865827282,
  47351.731221107155,
  102604.9621912511,
  115752.00423070104,
  172941.29247808916,
  140882.51454627473,
  31701.357075515363,
  93471.23554235822,
  151021.66691965752,
  23501.626178135106,
  129881.91012802467,
  147091.60723464526,
  94638.12310308058,
  102398.72114126284,
  93887.43366667931,
  111272.58451057218,
  155800.92130720508,
  158375.50779511002,
  82579.30159071417,
  148643.30036670342,
  118525.70870549903,
  111811.2072560205,
  139114.4262105074,
  67624.57025434995,
  99687.20413948681,
  102484.52220772069,
  126058.81603687396,
  154771.88530741152,
  97322.49875895563,
  147536.08519548827,
  144129.064754354,
  148362.48936351412,
  153554.7051379876,
  171065.25394083132,
  44135.62368854007,
  94393.06614495875,
  120073.46037596733,
  164461.6048772966,
  124105.73295635536,
  107451.9904094457,
  164771.45180038328,
  168266.33612182218,
  221941.29932386393,
  135203.9730744313,
  157360.01691138028,
  92317.29760187627,
  139784.66097539518,
  94047.13307779764,
  318686.5193834384,
  311615.5946641619,
  311682.40370773023,
  351615.9660099276,
  337156.12667625595,
  214954.52946712426,
  291947.06783626537,
  208247.3671271437,
  220111.36776792808,
  264286.9817414906,
  174564.24101345864,
  255857.1739524589,
  139189.13969231953,
  203134.4599162332,
  209515.40850391515,
  217200.6967041128,
  203192.3440496927,
  154905.64513047645,
  131535.43958445368,
  252825.54917552503,
  247857.09649351786,
  191739.81824268468,
  213218.56865698958,
  249585.85653587157,
  288442.4531463239,
  212513.54132519977,
  248667.24939594278,
  172970.50848993319,
  120521.51026439096,
  132366.24486422594,
  100587.37323212912,
  136001.88765997303,
  123220.75171928406,
  140544.51109811375,
  136439.70540813467,
  114373.2775489541,
  112784.12062673259,
  165673.43209732504,
  144622.71998223037,
  195511.05375352397,
  198075.0031315554,
  233779.920474911,
  154405.1601048577,
  203627.85120323152,
  189301.000942624,
  229841.68118760452,
  107211.85634022015,
  122121.56049959129,
  117792.50172657697,
  235063.40112237257,
  317551.90700662765,
  163357.29856745838,
  64401.3148939814,
  297081.2291386338,
  66615.7594304728,
  246466.9939984964,
  142269.06646419273,
  175919.16525024283,
  176357.64850029763,
  381997.72919627576,
  334546.7784078361,
  238143.1607176541,
  215178.0546650195,
  203324.26229819207,
  381870.856924293,
  141732.4088162427,
  177102.52331675967,
  144019.97648989258,
  125331.44541698924,
  144043.47933495112,
  145023.15907325648,
  197659.80071695542,
  185910.84143835123,
  175120.2121262876,
  213190.2541654493,
  187394.75401502143,
  177635.45802284317,
  247867.9720884951,
  191462.12118342664,
  180701.7848374815,
  177031.15511708695,
  227020.32512348716,
  389259.1216015237,
  397932.3195811439,
  170468.79880672763,
  348868.8066709142,
  244516.3938128702,
  249511.26531479193,
  193648.9194404709,
  255455.60519561812,
  213627.03660684236,
  121074.22926144657,
  180936.88707861787,
  134006.82579026514,
  282412.4757696481,
  158582.73587247558,
  268190.0278730225,
  151458.6377440074,
  106143.83297815133,
  119899.91408446952,
  88721.75797738931,
  105645.51454723149,
  106301.10716924605,
  132826.45625320994,
  132117.23334481972,
  302486.8487801247,
  399414.58390756685,
  371497.43785492424,
  399428.3697664675,
  426506.4072786219,
  369524.2544639036,
  281381.2517227469,
  342708.09379677003,
  448011.411757246,
  262165.44382581604,
  341917.29964322166,
  347892.9826849175,
  308603.6281021517,
  207777.87267457935,
  340425.6965231654,
  212046.74880517117,
  201481.11800643516,
  175169.375469058,
  218186.3509201318,
  210722.41931299324,
  202892.9129177427,
  169840.83234344216,
  193760.2880102151,
  210130.57378959894,
  233115.29710737275,
  224395.7190173393,
  169919.27192430926,
  253392.85049975035,
  186397.74010079986,
  240940.05710132763,
  301121.6532435618,
  302672.90274331986,
  290886.79225945123,
  300010.4089910941,
  261205.24270568206,
  250530.84777619893,
  243158.94961150744,
  268512.4980079523,
  233073.26936316266,
  227632.95496251795,
  236170.55191294703,
  227797.30509724413,
  197048.22069375127,
  195149.46044638284,
  138997.49867625418,
  162579.97846102327,
  187033.80034872328,
  192375.8576711573,
  222625.1064614571,
  197998.81265946184,
  197851.7838319025,
  102801.97746827149,
  141552.4304332352,
  55532.898893076825,
  90459.21654457961,
  195130.31010070085,
  147015.38334247548,
  266545.2401400335,
  329347.6940968047,
  172256.8727987007,
  157450.7224540386,
  152767.33149478788,
  175970.19294988236,
  260226.13471168088,
  235015.72628344438,
  256991.33542567567,
  249415.64464620588,
  169584.05604192143,
  234639.726897254,
  194492.715450143,
  198842.569784671,
  289055.05375527823,
  204751.77192982123,
  314419.7860925272,
  289548.2696610029,
  214796.17156038294,
  181602.78928468892,
  178922.58070091018,
  209345.72983640363,
  150508.32041849277,
  156870.76951030293,
  136293.16561239946,
  146504.9216889281,
  173247.47211682628,
  100420.81827066213,
  107620.73795136715,
  153885.25138850842,
  79292.26718545472,
  165266.25002082955,
  129548.77356742919,
  105964.2194289334,
  216508.22481100587,
  133529.6043003186,
  177627.4341555334,
  197063.3799043012,
  138596.32886444483,
  124441.7366675046,
  152021.28175116752,
  132962.52116339584,
  174799.16213099007,
  121674.69034674557,
  161562.1716570269,
  104314.90795296032,
  107523.30877889738,
  87459.75485247246,
  143642.07976490736,
  136365.32306229402,
  176406.88420089477,
  180816.62885967744,
  129360.9603190602,
  154889.00862701,
  124933.84173252151,
  139594.45766026556,
  127748.55504736176,
  134640.71464720246,
  135791.9206654724,
  160798.0328853388,
  113146.07431470668,
  120741.76932282903,
  115001.07769565936,
  119775.40594739861,
  100795.28406637686,
  77510.98608030679,
  122900.27975882885,
  94267.95603702444,
  113552.05531088245,
  142404.8157757382,
  114633.04392102569,
  142179.08032339555,
  68102.99770766006,
  92572.27365070843,
  176031.66365347643,
  27463.570292582765,
  73033.63364150687,
  103310.54646559707,
  115582.19002962255,
  100732.88623145959,
  149340.75729724442,
  139164.3908592772,
  39243.7643981359,
  198730.59021824412,
  129615.88895335408,
  119118.07265919179,
  125587.66275438014,
  149587.26798739142,
  143747.89259715407,
  120013.10277584907,
  109642.50660756079,
  171930.82739334545,
  110963.87287621097,
  162750.81326888292,
  133948.39056645273,
  107609.97588192184,
  105342.54708320387,
  114639.4193811379,
  118442.44963986836,
  62222.79293097454,
  167998.142864837,
  130989.60972149752,
  140895.7427373504,
  179717.1089268608,
  137757.4491642717,
  98118.04928239468,
  153063.5744768213,
  119262.03565062415,
  109509.37424780823,
  126486.11731010658,
  136746.9492993556,
  115607.62748736347,
  52201.875983979815,
  116821.62586100829,
  137858.82524586777,
  146582.76324229754,
  136322.8155009723,
  168382.89295415624,
  128039.05173667427,
  133246.60616895714,
  88925.41270249698,
  145529.2011396953,
  191785.367361772,
  98222.9648727522,
  135951.27906118278,
  144839.76150655164,
  243162.81046060703,
  117683.38232907951,
  198395.3116842562,
  170893.9176379512,
  106059.95625487305,
  147123.31083427925,
  260179.93037898315,
  231963.6266134194,
  246178.20597839844,
  208353.0422652166,
  189787.94111736445,
  230664.10504952128,
  367239.26177103573,
  347600.6509317951,
  236149.56289524218,
  193292.18312942464,
  160231.29993884172,
  231904.46020181436,
  214216.2365085425,
  201084.7724355102,
  228226.03228225478,
  153808.15861286907,
  141078.61636944936,
  165719.81032527453,
  227744.90290441085,
  265943.540832852,
  309101.0199700814,
  246686.391668187,
  215670.11938329745,
  129144.42535349957,
  236933.0058869814,
  198995.2135652522,
  226419.5727668695,
  194290.27690781676,
  120118.75600044765,
  125377.33144371596,
  161339.11710059573,
  146593.20488198515,
  151993.74299561867,
  358838.03174965596,
  76517.03288718266,
  80617.32254347274,
  54253.09490099522,
  138655.02963129786,
  100956.42561142312,
  117552.53047518792,
  107220.3859633188,
  122208.0207407891,
  160275.86185012647,
  182087.67893365287,
  152195.70257860312,
  163391.36275496217,
  203393.29500881518,
  152036.86466486836,
  202160.83784313744,
  145445.89270529497,
  156758.3412808205,
  204475.5373598325,
  261542.0418619536,
  273543.53847063566,
  125081.09996256974,
  117193.67182988526,
  131618.90826507687,
  108869.56210962399,
  120146.5908168799,
  100638.22316088695,
  174244.88574801123,
  79916.65202789471,
  69720.30753985308,
  67886.95508109637,
  55898.147332809196,
  298208.6703841506,
  306177.0740172243,
  297217.924494076,
  217205.2453326026,
  135894.74861289767,
  202011.36677992184,
  188875.43486453258,
  280136.34760651586,
  257847.9665378714,
  153096.38483110061,
  223874.65257338568,
  190667.6876030399,
  198811.27984629606,
  244824.03134519572,
  234458.66232419483,
  253635.8125503247,
  299715.8485883742,
  186619.78223450037,
  120743.40212048538,
  166312.55423084545,
  147894.70512549413,
  130617.69627389591,
  131997.79681041342,
  103411.81287821202,
  85625.6811814573,
  146590.27216757654,
  124092.77806330216,
  132420.00447776794,
  124243.91942108337,
  133617.90068812403,
  178323.2570054396,
  202579.0749474002,
  158886.65512165465,
  175596.1073745667,
  193382.52742491337,
  171123.14491188194,
  223013.47533129097,
  153918.42220266425,
  179122.81645089464,
  151624.71773809323,
  212319.66435746383,
  241242.0896009513,
  381283.9359330543,
  479182.17877912655,
  175550.30013877223,
  322135.72658766655,
  385024.80755039444,
  408743.5884153814,
  154753.1765461965,
  195351.8413910293,
  224039.6181363538,
  183370.83518623828,
  154550.32302384314,
  194121.9821661368,
  175622.61361726664,
  203606.43504716136,
  190159.03319915367,
  154876.18079408281,
  140537.18062877623,
  120550.43694351416,
  152853.3451697714,
  181131.77006385126,
  104155.2905751529,
  120169.61942807105,
  145182.2805324022,
  125150.0782576121,
  367479.3718092297,
  287746.488212383,
  346809.9278893394,
  434706.98678272055,
  342197.8175275039,
  396843.4882134609,
  444101.2732237816,
  386444.01643137727,
  438404.9199889886,
  288714.75208444777,
  370281.99445206055,
  353450.20650876826,
  348906.3911939554,
  327976.438265727,
  326894.83631667425,
  252743.62623753198,
  244397.5157892397,
  255316.3415453778,
  201519.20104408858,
  191381.9344809001,
  199114.2332071724,
  212106.52193880652,
  295570.54769860016,
  211232.24368955934,
  205191.67607183702,
  202784.32132049015,
  176511.44130283588,
  201129.46988652454,
  186614.65495062433,
  202765.58273876208,
  198239.2291869178,
  192067.59900343895,
  190319.295065746,
  185632.6994307412,
  230166.476596409,
  192160.21136947253,
  197726.33273193045,
  181078.3460720583,
  217389.75559984375,
  172348.12683740485,
  203850.2653309182,
  230660.26664576013,
  199766.71811357548,
  191781.92206047967,
  320704.2868973315,
  376235.1042279808,
  309699.4974212103,
  259290.41152462782,
  281772.7518251697,
  304381.8977116484,
  201839.12009329422,
  258787.15084686625,
  211008.39539800707,
  374746.7094474633,
  211608.82737323013,
  222495.3581613931,
  220134.61587973515,
  217313.1926957004,
  219102.97838416422,
  221387.67349143734,
  205342.85812112468,
  252410.12269906886,
  204285.4233414342,
  322020.7183450101,
  273142.71402870107,
  242050.50068421237,
  255468.55009805883,
  148492.84095313374,
  144127.5408312645,
  153247.5320150868,
  185088.12411201233,
  199871.70324813432,
  126955.86288055137,
  106129.60761638152,
  149897.05686032466,
  271041.5481320251,
  144582.6125592436,
  167731.7310654374,
  206694.86956779502,
  184368.668138484,
  212028.7609494142,
  219532.03624580568,
  203459.4321376513,
  169692.12425778847,
  172970.1212817037,
  198974.36579117208,
  287640.61389435205,
  312532.95141256764,
  195914.37621308316,
  281323.7879556769,
  332130.7281520205,
  142751.6979263117,
  223885.54199808784,
  138815.54645441365,
  159389.38432348933,
  195370.6910853684,
  198991.83918230905,
  248306.04641513457,
  160212.2129400335,
  122919.88895824278,
  136914.84717253392,
  107479.889895344,
  113030.35466647967,
  150362.81681228854,
  148889.5587027582,
  122336.57950094432,
  164620.557807036,
  151068.90227306163,
  216189.7160117082,
  140925.09303434246,
  228505.1512761201,
  122000.44195875544,
  49831.4697283868,
  51726.28072019566,
  125967.2042382221,
  123135.27715202054,
  152102.01678111576,
  155352.65094345633,
  149307.73945747394,
  81673.2944211837,
  140303.72893098387,
  153432.1881297267,
  120971.80329693248,
  190708.63634219643,
  114457.96726618822,
  170632.126725676,
  135976.18987463886,
  168042.1068368597,
  142679.35124530862,
  139513.73255853,
  127702.8878664005,
  124937.62037786125,
  134455.272390689,
  123644.98585200396,
  142371.91826926605,
  111282.59856877045,
  122068.34790203122,
  139082.98729349894,
  242283.53441493533,
  127905.68403636581,
  126895.71942160976,
  180008.5945867115,
  107019.52071677276,
  136638.89479005206,
  103445.52247675296,
  148041.81076362042,
  142863.58125913987,
  144392.46575217025,
  165041.10765218805,
  120860.5427660679,
  108197.7015483076,
  116060.33337623032,
  90005.98416568234,
  130092.85491797923,
  96309.43117420176,
  92574.2818126675,
  135116.8110319603,
  130685.24744002648,
  80381.92859908179,
  144815.14270623832,
  188519.4471638814,
  131801.94169477967,
  110510.55403771333,
  171142.66710819592,
  125877.88417970034,
  209734.630134742,
  93170.93858601002,
  120604.92660942512,
  79460.9628062537,
  167291.67860155558,
  130063.78225635845,
  134323.63248711312,
  111884.63069312442,
  139366.1603421296,
  ...]}
In [275]:
len(predn)
Out[275]:
1
In [276]:
ID=df2['Id']
In [277]:
ID
Out[277]:
0       1461
1       1462
2       1463
3       1464
4       1465
        ... 
1454    2915
1455    2916
1456    2917
1457    2918
1458    2919
Name: Id, Length: 1459, dtype: int64
In [278]:
df_final1=pd.DataFrame(ID)
In [279]:
df_final1
Out[279]:
Id
0 1461
1 1462
2 1463
3 1464
4 1465
... ...
1454 2915
1455 2916
1456 2917
1457 2918
1458 2919

1459 rows × 1 columns

In [280]:
df_final2=pd.DataFrame(predn)
In [281]:
df_final2
Out[281]:
Output
0 138429.478709
1 181922.382876
2 202868.940141
3 208819.396682
4 196351.074004
... ...
1454 79205.201930
1455 81236.386079
1456 196078.945995
1457 119756.134336
1458 222904.364836

1459 rows × 1 columns

Final output Dataset¶

In [282]:
df_final=df_final1.join(df_final2)
In [283]:
df_final
Out[283]:
Id Output
0 1461 138429.478709
1 1462 181922.382876
2 1463 202868.940141
3 1464 208819.396682
4 1465 196351.074004
... ... ...
1454 2915 79205.201930
1455 2916 81236.386079
1456 2917 196078.945995
1457 2918 119756.134336
1458 2919 222904.364836

1459 rows × 2 columns